library(mvtnorm)
mcmc_MH <- function(Niter, proposal_sigma, mu_init, LL){
mu_new <- mu_init
d <- length(mu_new)
samples_MH <- mu_new
accept_MH <- 0
for(iter in 1: Niter){
if(d > 1){
propose_MH_mu <- as.numeric(rmvnorm(1, mean = mu_new, sigma = diag(rep(proposal_sigma^2, d))))
}
if(d == 1){
propose_MH_mu <- rnorm(1, mean = mu_new, sd = proposal_sigma)
}
rtemp <- LL(propose_MH_mu) - LL(mu_new)
if(runif(1) < exp(rtemp)){
mu_new <- propose_MH_mu
accept_MH <- accept_MH + 1
}
samples_MH <- rbind(samples_MH, mu_new)
}
return(list(samples = samples_MH, acceptance = accept_MH /Niter))
}
beta <- matrix(c(2, -1), ncol = 1) # we store the arbitrary parameter in a 2x1 matrix
n <- 100 # sample size
X <- cbind(rnorm(n, mean=1), rnorm(n, mean=1.5)) # matrix of explanatory variables
Y <- as.numeric(pnorm(X%*%beta) >= runif(n)) # observations
table(Y)
## Y
## 0 1
## 36 64
B <- matrix(c(3, 0, 0, 3), ncol = 2)
Binv <- solve(B) # prior variance
library(ggplot2)
log_prior <- function(beta){
return(- t(beta) %*% Binv %*% (beta) / 2)
}
full_log_posterior_probit <- function(beta){
return(sum(pnorm(q=X[Y == 1,] %*% beta, mean=0, sd=1, log.p=TRUE)) +
sum(pnorm(q=-X[Y == 0,]%*% beta, mean=0, sd=1, log.p=TRUE)) +
log_prior(beta))
}
grid.df <- expand.grid(seq(from=0.5,to=3.5,length.out=100), seq(from=-2,to=0,length.out=100))
names(grid.df) <- c("beta1", "beta2")
grid.df$density <- mapply(FUN=function(x,y) exp(full_log_posterior_probit(matrix(c(x,y),ncol=1))), grid.df$beta1, grid.df$beta2)
g2d <- ggplot(grid.df) + geom_raster(aes(x=beta1, y=beta2, fill=density))
g2d <- g2d + xlab(expression(beta[1])) + ylab(expression(beta[2]))
g2d <- g2d + theme(legend.position = "none")
print(g2d)
Niter = 10000
proposal_sigma = 0.3 # 0.3 # try other values
mu_init = rnorm(2) # try other values
samplesMH = mcmc_MH (Niter, proposal_sigma, mu_init, full_log_posterior_probit)
plot(samplesMH$samples, type = 'l', xlab = '', ylab = '')
points(t(mu_init), cex = 2, col = 'red')
par(mfrow = c(2, 2))
for(k in 1:2){
plot(samplesMH$samples[, k], type = 'l', xlab = '', ylab = '')
hist(samplesMH$samples[, k], xlab = '', main = '')
}
print(samplesMH$acceptance)
## [1] 0.2847
gMH2d <- ggplot(data.frame(X1 = samplesMH$samples[, 1], X2 = samplesMH$samples[, 2]), aes(x=X1, y=X2)) + geom_density2d()
gMH2d <- gMH2d+ xlab(expression(beta[1])) + ylab(expression(beta[2]))
print(gMH2d)
# Gibbs sampling for Probit regression
library(truncnorm)
# we first implement the conditionals
ZcondBeta <- function(beta){
truncnormals <- rep(0, n)
truncnormals[Y == 1] <- rtruncnorm(n=sum(Y == 1), mean= X[Y == 1,] %*% beta, sd=1, a=0)
truncnormals[Y == 0] <- rtruncnorm(n=sum(Y == 0), mean= X[Y == 0,] %*% beta, sd=1, b=0)
return(truncnormals)
}
BetacondZ <- function(Z){
variance <- solve(solve(B) + t(X) %*% X)
mean <- variance %*% (t(X) %*% Z)
return(t(rmvnorm(n=1, mean=mean, sigma=variance)))
}
# then run the systematic Gibbs sampler
niterations <- Niter
# starting point for the Markov chain; for fun we take the same as for the MH run
current_beta <- matrix(rnorm(2), ncol = 1)
# matrices storing the whole trajectory of the Markov chain
beta_chain <- matrix(rep(current_beta, niterations), nrow=niterations, byrow=TRUE)
# at each iteration:
for (t in 2:niterations){
current_z <- ZcondBeta(current_beta)
current_beta <- BetacondZ(current_z)
beta_chain[t,] <- current_beta
}
Gibbschain.df <- data.frame(beta_chain)
Gibbschain.df$step <- 1:niterations
gGibbs <- ggplot(subset(Gibbschain.df, step < 1000), aes(x=X1, y=X2)) + geom_path() + geom_point()
gGibbs <- gGibbs + xlab(expression(beta[1])) + ylab(expression(beta[2]))
print(gGibbs)
gGibbs2d <- ggplot(Gibbschain.df, aes(x=X1, y=X2)) + geom_density2d()
gGibbs2d <- gGibbs2d+ xlab(expression(beta[1])) + ylab(expression(beta[2]))
print(gGibbs2d)
# compare MH and Gibbs
library(reshape2)
bacf <- acf(Gibbschain.df[,1], plot = FALSE) # compute ACF without plot
bacfdf <- with(bacf, data.frame(lag, acf)) # turn into data frame
names(bacfdf) <- c("Lag", "Gibbs") # rename the columns
bacfMH <- acf(samplesMH$samples[,1], plot = FALSE) # same same for MH
bacfMH <- with(bacfMH, data.frame(lag, acf))
names(bacfMH) <- c("Lag", "MH")
ACF <- merge(bacfdf, bacfMH, by = "Lag") # merge the data frame
ACF <- melt(ACF, id = "Lag") # reshape the data frame for ggplot2
ACF$Lag[which(ACF$variable == 'MH')] = ACF$Lag[which(ACF$variable == 'MH')]+0.5
gACF <- ggplot(data=ACF, mapping=aes(x=Lag, y = value, colour = variable))
gACF <- gACF + geom_segment(mapping = aes(xend = Lag, yend = 0), size =1)
gACF <- gACF + ylab("ACF") + scale_color_discrete(name = "Sampler")
print(gACF)
# STAN
library(rstan)
## Loading required package: StanHeaders
## rstan (Version 2.17.3, GitRev: 2e1f913d3ca3)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)
rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())
probit_dat <- list(N = n, K = 2, y = Y, x = X)
fit <- stan(file = 'probit.stan', data = probit_dat,
iter = 1000, chains = 4)
print(fit)
## Inference for Stan model: probit.
## 4 chains, each with iter=1000; warmup=500; thin=1;
## post-warmup draws per chain=500, total post-warmup draws=2000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## beta[1] 1.91 0.02 0.35 1.30 1.66 1.89 2.13 2.64 434 1.01
## beta[2] -0.92 0.01 0.20 -1.33 -1.04 -0.91 -0.78 -0.55 434 1.01
## lp__ -31.13 0.04 1.04 -33.91 -31.51 -30.80 -30.40 -30.16 555 1.01
##
## Samples were drawn using NUTS(diag_e) at Mon Mar 12 11:06:15 2018.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
# load data
library(lars)
## Loaded lars 1.2
data("diabetes")
d_x <- diabetes$x
d_y <- diabetes$y
# calculate LASSO results
library(glmnet)
## Loading required package: Matrix
## Loading required package: foreach
## Loaded glmnet 2.0-10
lasso_results <- glmnet(d_x, d_y, intercept=FALSE)
lasso_coef <- coef(lasso_results, s = 0.237)
d_x <- diabetes$x
bool_centralize <- 1
if(bool_centralize){
d_x <- apply(d_x, 2, function(xx) xx-mean(xx))
d_y <- d_y - mean(d_y)
}
library(mvtnorm)
calc_pi <- function(x, y, beta, log_sigma_sq, lambda, p) {
sum(dnorm(y, x %*% beta, rep(exp(log_sigma_sq), length(y)), log = T)) + p * log(lambda / 2) - p/2 * log_sigma_sq - lambda / sqrt(exp(log_sigma_sq)) * sum(abs(beta))
}
p <- ncol(d_x)
lambda = 0.237
beta_draws <- matrix(1, nrow = p)
log_sigma_sq_draws <- c(0)
num_draws = 20000
accept = 0
set.seed(12345)
for (i in 1:num_draws) {
log_sigma_sq = rnorm(1, log_sigma_sq_draws[i], 0.1)
beta <- t(rmvnorm(1, beta_draws[, i], exp(log_sigma_sq) * lambda * diag(p)))
accept_ratio = calc_pi(d_x, d_y, beta, log_sigma_sq, lambda, p) - calc_pi(d_x, d_y, beta_draws[, i], log_sigma_sq_draws[i], lambda, p)
if (runif(1) <= exp(accept_ratio)) {
accept = accept + 1
beta_draws <- cbind(beta_draws, beta)
log_sigma_sq_draws <- c(log_sigma_sq_draws, log_sigma_sq)
} else {
beta_draws <- cbind(beta_draws, beta_draws[, i])
log_sigma_sq_draws <- c(log_sigma_sq_draws, log_sigma_sq_draws[i])
}
}
#Acceptance ratio
accept / num_draws
## [1] 0.3657
#Trace plots and histograms
plot(log_sigma_sq_draws, type = "l")
plot(beta_draws[1,], type = "l")
for(j in 1:dim(beta_draws)[1]){
hist(beta_draws[j,], 20, main = row.names(lasso_coef)[j+1], xlab = paste("beta", row.names(lasso_coef)[j+1]))
abline(v = lasso_coef[j+1], col = "red")
}
beta_mh <- beta_draws
rowMeans(beta_draws[, 10002:20001])
## [1] 1.526401 -95.568840 471.417709 181.756276 -20.874760
## [6] -20.626609 -205.339334 18.891134 486.850784 44.253492
if (!require(statmod)) {
install.packages("statmod")
library(statmod)
}
## Loading required package: statmod
d_x <- diabetes$x
d_y <- diabetes$y
cd_x <- apply(d_x, 2, function(col) {col - mean(col)})
cd_y <- d_y - mean(d_y)
n = length(cd_y)
p <- ncol(d_x)
lambda = 0.237
sigma_sq_draws <- c(1)
beta_draws <- matrix(1, nrow = p)
tau_draws <- beta_draws
num_draws = 20000
accept = 0
set.seed(12345)
for (i in 1:num_draws) {
sigma_sq_draws <- c(sigma_sq_draws,
1 / rgamma(1, (n + p - 1) / 2,
sum((cd_y - cd_x %*% beta_draws[,i])^2) +
sum(beta_draws[, i]^2 / tau_draws[, i])))
m <- (t(cd_x) %*% cd_x + diag(1 / tau_draws[, i]))
beta_draws <- cbind(beta_draws,
t(rmvnorm(1, solve(m, t(cd_x) %*% cd_y), solve(m) *
sigma_sq_draws[i + 1])))
tau_draws <- cbind(tau_draws,
unlist(sapply(beta_draws[, (i + 1)], function(beta) {
1 / rinvgauss(1, sqrt(lambda^2 * sigma_sq_draws[i + 1] / beta^2), lambda^2)
})))
}
#Trace plots
plot(sigma_sq_draws, type = "l")
plot(beta_draws[1,], type = "l")
plot(tau_draws[1,], type = "l")
for(j in 1:dim(beta_draws)[1]){
hist(beta_draws[j,], 20, main = row.names(lasso_coef)[j+1], xlab = paste("beta", row.names(lasso_coef)[j+1]))
abline(v = lasso_coef[j+1], col = "red")
}
beta_gibbs <- beta_draws
library(glmnet)
library(lars)
library(rstan)
data("diabetes")
rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())
d_x <- diabetes$x
d_y <- diabetes$y
data_list <- list("n" = length(d_y), "p" = ncol(d_x), "lambda" = 0.237, "x" = d_x, "y" = d_y)
stan_lasso <- stan(file = "bayes_lasso.stan", data = data_list)
summary(stan_lasso)$summary[, "mean"]
## sigma beta[1] beta[2] beta[3] beta[4] beta[5]
## 161.98754 2.01220 -190.84682 503.54231 290.08816 -121.86006
## beta[6] beta[7] beta[8] beta[9] beta[10] lp__
## -42.10331 -169.62262 106.81965 475.71550 74.05703 -2553.96496
samples_stan <- extract(stan_lasso)
beta_draws_stan <- t(samples_stan$beta)
for(j in 1:dim(beta_draws_stan)[1]){
hist(beta_draws_stan[j,], 20, main = row.names(lasso_coef)[j+1], xlab = paste("beta", row.names(lasso_coef)[j+1]))
abline(v = lasso_coef[j+1], col = "red")
}
library(reshape2)
for(j in 1:10){
acfgibbs <- acf(beta_gibbs[j, ], plot = FALSE)
acfgibbsdf <- with(acfgibbs, data.frame(lag, acf))
names(acfgibbsdf) <- c("Lag", "Gibbs")
acfmh <- acf(beta_mh[j, ], plot = FALSE)
acfmhdf <- with(acfmh, data.frame(lag, acf))
names(acfmhdf) <- c("Lag", "MH")
acfstan <- acf(beta_draws_stan[j, ], plot = FALSE)
acfstandf <- with(acfstan, data.frame(lag, acf))
names(acfstandf) <- c("Lag", "STAN")
ACF <- merge(acfmhdf, acfgibbsdf, by = "Lag")
ACF <- merge(ACF, acfstandf, by = "Lag")
ACF <- melt(ACF, id = "Lag")
ACF$Lag[which(ACF$variable == 'MH')] = ACF$Lag[which(ACF$variable == 'MH')]+0.34
ACF$Lag[which(ACF$variable == 'Gibbs')] = ACF$Lag[which(ACF$variable == 'Gibbs')]+0.7
gACF <- ggplot(data=ACF, mapping=aes(x=Lag, y = value, colour = variable))
gACF <- gACF + geom_segment(mapping = aes(xend = Lag, yend = 0), size =1)
gACF <- gACF + ylab("ACF") + scale_color_discrete(name = "Sampler")
print(gACF)
}
for(j in 1:10){
acfgibbs <- acf(beta_gibbs[j, ], plot = FALSE)
acfgibbsdf <- with(acfgibbs, data.frame(lag, acf))
names(acfgibbsdf) <- c("Lag", "Gibbs")
acfstan <- acf(beta_draws_stan[j, ], plot = FALSE)
acfstandf <- with(acfstan, data.frame(lag, acf))
names(acfstandf) <- c("Lag", "STAN")
ACF <- merge(acfgibbsdf, acfstandf, by = "Lag")
ACF <- melt(ACF, id = "Lag")
ACF$Lag[which(ACF$variable == 'Gibbs')] = ACF$Lag[which(ACF$variable == 'Gibbs')]+0.5
gACF <- ggplot(data=ACF, mapping=aes(x=Lag, y = value, colour = variable))
gACF <- gACF + geom_segment(mapping = aes(xend = Lag, yend = 0), size =1)
gACF <- gACF + ylab("ACF") + scale_color_discrete(name = "Sampler")
print(gACF)
}